Find Species-Level Images of Tree Genera from AutoArborist by Reconciling Locations of City Tree Inventories

In [5]:
# Libraries
import pandas as pd
pd.set_option('display.max_rows', 500)
import os
import matplotlib.pyplot as plt
from PIL import Image

Read in Species-Rectified AutoArborist Dataset

From Github: https://github.com/ncsu-landscape-dynamics/gsv_host_detector/tree/main/tree_inventory

In [2]:
# The AutoArborist dataset contains genus-level images and locations. 
# This data was merged to city tree inventories with species-level location data by latitude/ longitude
aa_data = pd.read_csv(r"C:/users/talake2/downloads/AutoArboristData.csv", low_memory=False)
aa_data.head()
Out[2]:
Index IDX SHAPE_LNG SHAPE_LAT GENUS TAXONOMY_ID location type Tree ID Tree Location IDX City rounded_lat rounded_lng Genus Genus Label Capture Time Street Img Tree Center X Street Img Tree Center Y Street Img genus_name species_name
0 0 077b8586-1f14-11ec-81f2-eb8801c6f8d0 -86.532817 39.159436 cornus 83 Bloomington train 8565243926414771871 077b8586-1f14-11ec-81f2-eb8801c6f8d0 Bloomington 39.159435 -86.532814 cornus 83 May 2019 257 576 NaN NaN
1 1 077b8694-1f14-11ec-81f2-eb8801c6f8d0 -86.532962 39.159282 cornus 83 Bloomington train 895590795344673619 077b8694-1f14-11ec-81f2-eb8801c6f8d0 Bloomington 39.159283 -86.532960 cornus 83 May 2019 387 576 NaN NaN
2 2 077b877a-1f14-11ec-81f2-eb8801c6f8d0 -86.531563 39.159272 cornus 83 Bloomington train 8300956921542110556 077b877a-1f14-11ec-81f2-eb8801c6f8d0 Bloomington 39.159270 -86.531560 cornus 83 May 2019 313 576 NaN NaN
3 3 077b8842-1f14-11ec-81f2-eb8801c6f8d0 -86.531343 39.159279 juniperus 156 Bloomington train 2921012368023539546 077b8842-1f14-11ec-81f2-eb8801c6f8d0 Bloomington 39.159280 -86.531340 juniperus 156 May 2019 521 576 NaN NaN
4 4 077b8900-1f14-11ec-81f2-eb8801c6f8d0 -86.530603 39.159408 quercus 247 Bloomington train 11472771202647275742 077b8900-1f14-11ec-81f2-eb8801c6f8d0 Bloomington 39.159410 -86.530600 quercus 247 May 2019 253 576 NaN NaN

Summarize Data by Genus

What proportion of AutoArborist Records Match Tree Inventories?

In [10]:
# For each genus in the 'GENUS' column from AutoArborist,
# How often does the 'GENUS' match the 'genus_name' column from the tree inventories?

# Match 'GENUS' and 'genus_name' columns by string
aa_data['match'] = aa_data['GENUS'].str.lower() == aa_data['genus_name'].str.lower()


# Calculate the proportion of matches per genus
# How often does the 'GENUS' match the 'genus_name' column from the tree inventories?
match_summary = aa_data.groupby('GENUS').agg(
    total_records=('match', 'size'),
    match_count=('match', 'sum')
)

# Calculate proportion of matching columns and filter
match_summary['match_proportion'] = match_summary['match_count'] / match_summary['total_records']
match_summary = match_summary[match_summary['total_records'] >= 500]

# Display the summary
match_summary.reset_index(inplace=True)
match_summary
Out[10]:
GENUS total_records match_count match_proportion
0 abies 897 11 0.012263
1 acacia 4947 239 0.048312
2 acer 98044 1871 0.019083
3 aesculus 10027 222 0.022140
4 afrocarpus 2262 79 0.034925
5 agonis 905 31 0.034254
6 ailanthus 3824 100 0.026151
7 albizia 1612 36 0.022333
8 alnus 2410 49 0.020332
9 amelanchier 13186 192 0.014561
10 araucaria 1074 27 0.025140
11 arbutus 4951 39 0.007877
12 archontophoenix 889 28 0.031496
13 bauhinia 604 20 0.033113
14 betula 13374 299 0.022357
15 brachychiton 4152 128 0.030829
16 brahea 590 15 0.025424
17 callistemon 7621 283 0.037134
18 calocedrus 710 30 0.042254
19 carpinus 16075 289 0.017978
20 carya 825 11 0.013333
21 cassia 806 26 0.032258
22 casuarina 2998 84 0.028019
23 catalpa 7440 173 0.023253
24 cedrus 4677 132 0.028223
25 ceiba 585 3 0.005128
26 celtis 18883 296 0.015675
27 ceratonia 6725 183 0.027212
28 cercidiphyllum 6827 168 0.024608
29 cercis 11730 218 0.018585
30 chamaecyparis 2009 52 0.025884
31 chionanthus 724 21 0.029006
32 cinnamomum 8944 205 0.022920
33 citrus 2375 52 0.021895
34 cladrastis 3487 50 0.014339
35 cordyline 1509 31 0.020543
36 cornus 12803 287 0.022417
37 corylus 1692 34 0.020095
38 corymbia 3135 112 0.035726
39 cotinus 508 14 0.027559
40 crataegus 17802 306 0.017189
41 cupaniopsis 5207 163 0.031304
42 cupressus 7806 225 0.028824
43 dodonaea 612 47 0.076797
44 elaeagnus 1201 19 0.015820
45 eriobotrya 3841 96 0.024993
46 erythrina 899 29 0.032258
47 eucalyptus 11021 1315 0.119318
48 eucommia 1796 36 0.020045
49 fagus 6482 116 0.017896
50 ficus 9327 302 0.032379
51 fraxinus 77489 2189 0.028249
52 geijera 2223 49 0.022042
53 ginkgo 21017 396 0.018842
54 gleditsia 35141 663 0.018867
55 grevillea 1347 36 0.026726
56 gymnocladus 11689 198 0.016939
57 hibiscus 784 9 0.011480
58 ilex 1351 35 0.025907
59 jacaranda 7092 210 0.029611
60 juglans 5333 94 0.017626
61 juniperus 8082 210 0.025984
62 koelreuteria 8498 175 0.020593
63 lagerstroemia 17434 140 0.008030
64 lagunaria 923 44 0.047671
65 larix 916 21 0.022926
66 laurus 1570 49 0.031210
67 leptospermum 890 22 0.024719
68 ligustrum 8361 152 0.018180
69 liquidambar 28065 588 0.020951
70 liriodendron 7740 183 0.023643
71 lophostemon 4290 298 0.069464
72 maackia 2183 55 0.025195
73 magnolia 23636 431 0.018235
74 malus 34694 383 0.011039
75 maytenus 2806 79 0.028154
76 melaleuca 5086 187 0.036768
77 melia 752 19 0.025266
78 metasequoia 3531 85 0.024073
79 metrosideros 4485 201 0.044816
80 morus 7241 112 0.015467
81 myoporum 1613 46 0.028518
82 nerium 2304 39 0.016927
83 nyssa 4890 72 0.014724
84 olea 5496 157 0.028566
85 ostrya 2223 40 0.017994
86 parrotia 5555 135 0.024302
87 persea 1020 12 0.011765
88 phellodendron 1569 19 0.012110
89 phoenix 6473 214 0.033060
90 photinia 1063 15 0.014111
91 picea 22931 498 0.021717
92 pinus 27704 1178 0.042521
93 pistacia 8843 100 0.011308
94 pittosporum 6152 178 0.028934
95 platanus 31080 722 0.023230
96 podocarpus 4605 130 0.028230
97 populus 16840 404 0.023990
98 prunus 54859 944 0.017208
99 pseudotsuga 2775 76 0.027387
100 pyrus 36751 798 0.021714
101 quercus 58836 1216 0.020668
102 rhamnus 1861 56 0.030091
103 rhaphiolepis 769 27 0.035111
104 rhus 1334 23 0.017241
105 robinia 8206 175 0.021326
106 salix 1790 29 0.016201
107 schinus 6881 156 0.022671
108 sequoia 2892 127 0.043914
109 solanum 554 0 0.000000
110 sophora 1158 26 0.022453
111 sorbus 5418 101 0.018642
112 stewartia 807 19 0.023544
113 styphnolobium 2986 72 0.024113
114 styrax 4331 102 0.023551
115 syagrus 8084 33 0.004082
116 syringa 16650 321 0.019279
117 syzygium 1061 29 0.027333
118 tabebuia 955 27 0.028272
119 taxodium 2553 50 0.019585
120 thuja 6993 146 0.020878
121 tilia 51827 804 0.015513
122 tipuana 578 18 0.031142
123 trachycarpus 2017 47 0.023302
124 triadica 1849 2 0.001082
125 tristania 3099 168 0.054211
126 tristaniopsis 5632 357 0.063388
127 tsuga 665 17 0.025564
128 ulmus 57923 1520 0.026242
129 washingtonia 11295 261 0.023108
130 yucca 1698 33 0.019435
131 zelkova 18631 311 0.016693

Summarize Data by Genus and City

What proportion of AutoArborist Records Match Tree Inventories?

In [43]:
# Summarize Data - Proportion of Matches between Autoarborist and Tree Inventories for Genera and Cities

match_summary_city_genus = aa_data.groupby(['City', 'GENUS']).agg(
    total_records=('match', 'size'),
    match_count=('match', 'sum')
)
match_summary_city_genus['match_proportion'] = match_summary_city_genus['match_count'] / match_summary_city_genus['total_records']

# Filter out rows with less than 500 total_records
match_summary_city_genus = match_summary_city_genus[match_summary_city_genus['total_records'] >= 500]

# Sort by match_proportion in descending order
match_summary_city_genus = match_summary_city_genus.sort_values(by='match_proportion', ascending=False).reset_index()

print(match_summary_city_genus[match_summary_city_genus['GENUS'] == 'acer'])
             City GENUS  total_records  match_count  match_proportion
36       Edmonton  acer           1674           67          0.040024
51      Cambridge  acer           4529          171          0.037757
63         Surrey  acer           4925          181          0.036751
83        Boulder  acer           3321          119          0.035833
92        Seattle  acer           6918          244          0.035270
141    Pittsburgh  acer           4350          142          0.032644
185       NewYork  acer           3975          122          0.030692
226        Denver  acer           3956          114          0.028817
240  SanFrancisco  acer           1271           35          0.027537
251       Calgary  acer           1008           27          0.026786
260     Kitchener  acer           4100          107          0.026098
275    SiouxFalls  acer           6585          160          0.024298
304       Buffalo  acer           8514          172          0.020202
355  WashingtonDc  acer           6735           58          0.008612
368     Vancouver  acer           6919           53          0.007660
377      Montreal  acer           6432           41          0.006374
404    LosAngeles  acer          10496           43          0.004097
460      Columbus  acer           7190            0          0.000000
489       SanJose  acer           3442            0          0.000000
513   Bloomington  acer           1036            0          0.000000
In [11]:
# Summarize Data - Proportion of Matches between Autoarborist and Tree Inventories for Genera and Cities

match_summary_city_genus = aa_data.groupby(['City', 'GENUS']).agg(
    total_records=('match', 'size'),
    match_count=('match', 'sum')
)
match_summary_city_genus['match_proportion'] = match_summary_city_genus['match_count'] / match_summary_city_genus['total_records']

# Filter out rows with less than 500 total_records
match_summary_city_genus = match_summary_city_genus[match_summary_city_genus['total_records'] >= 500]

# Sort by match_proportion in descending order
match_summary_city_genus = match_summary_city_genus.sort_values(by='match_proportion', ascending=False).reset_index()

print(match_summary_city_genus[match_summary_city_genus['GENUS'] == 'juglans'])
           City    GENUS  total_records  match_count  match_proportion
147  LosAngeles  juglans           1878           61          0.032481
475     SanJose  juglans           1141            0          0.000000

Filter images of Acer by Species

In [12]:
# Filter AutoArborist by 'GENUS' column reported in AutoArborist
acer_data = aa_data[aa_data['GENUS'].str.lower() == 'acer']

# Filter Tree Inventory data by 'genus_name' column reported in tree inventories
acer_data = acer_data[acer_data['genus_name'].str.lower() == 'acer']

# Filter Tree Inventory data by 'species_name' column reported in tree inventories
red_maple_data = acer_data[acer_data['species_name'].str.lower() == 'rubrum']
print(f"There are ", len(red_maple_data), "records for red maple")

sugar_maple_data = acer_data[acer_data['species_name'].str.lower() == 'saccharum']
print(f"There are ", len(sugar_maple_data), "records for sugar maple")

silver_maple_data = acer_data[acer_data['species_name'].str.lower() == 'saccharinum']
print(f"There are ", len(silver_maple_data), "records for silver maple")

norway_maple_data = acer_data[acer_data['species_name'].str.lower() == 'platanoides']
print(f"There are ", len(norway_maple_data), "records for norway maple")
There are  294 records for red maple
There are  90 records for sugar maple
There are  232 records for silver maple
There are  635 records for norway maple

Filter Images of Juglans by Species

In [13]:
# Filter AutoArborist by 'GENUS column reported in AutoArborist
juglans_data = aa_data[aa_data['GENUS'].str.lower() == 'juglans']

# Filter Tree Inventory data by 'genus_name' column reported in tree inventories
juglans_data = juglans_data[juglans_data['genus_name'].str.lower() == 'juglans']

black_walnut_data = juglans_data[juglans_data['species_name'].str.lower() == 'nigra']
print(f"There are ", len(black_walnut_data), "records for black walnut")
There are  35 records for black walnut

Images of Species from AutoArborist

Acer Rubrum (Red Maple)

In [20]:
# Define the base directory for images
base_image_dir = r"C:/Users/talake2/Desktop/auto_arborist_cvpr2022_v015/jpegs_streetlevel_genus_idx_label"

# Function to display 20 images in a 4x5 grid
def display_species_images_grid(data, num_images=20):
    # Limit the data to the number of images to display
    data = data.head(num_images)
    
    # Set up a 4x5 grid
    fig, axes = plt.subplots(4, 5, figsize=(20, 16))
    axes = axes.ravel()  # Flatten the 2D array of axes for easy iteration
    
    for i, (_, row) in enumerate(data.iterrows()):
        # Construct the file path based on the 'type', 'GENUS', and 'IDX' columns
        image_path = os.path.join(base_image_dir, row['type'], row['GENUS'].lower(), 'images', f"{row['IDX']}.jpeg")
        
        # Check if the image file exists
        if os.path.isfile(image_path):
            img = Image.open(image_path)
            axes[i].imshow(img)
            axes[i].axis('off')  # Hide axes for cleaner display
            axes[i].set_title(f"{row['genus_name']} - {row['species_name']}")
        else:
            print(f"Image not found for IDX: {row['IDX']}")
            axes[i].axis('off')  # Hide axes if no image is found
    
    # Turn off any extra unused axes
    for j in range(i + 1, len(axes)):
        axes[j].axis('off')
    
    plt.tight_layout()
    plt.show()

display_species_images_grid(red_maple_data)
Image not found for IDX: 33ba0085-f4d2-3d46-9c40-80073794b386

Images of Species from AutoArborist

Acer Saccharum (Sugar Maple)

In [23]:
display_species_images_grid(sugar_maple_data)

Images of Species from AutoArborist

Acer Saccharinum (Silver Maple)

In [24]:
display_species_images_grid(silver_maple_data)

Images of Species from AutoArborist

Acer Platanoides (Norway Maple)

In [25]:
display_species_images_grid(norway_maple_data)
Image not found for IDX: 4750df91-35de-3f75-a5cd-9adef2a3ea12
Image not found for IDX: 1a0c90ac-6a4f-39ba-b650-8413785f067e

Images of Species from AutoArborist

Juglans Nigra (Black Walnut)

In [26]:
display_species_images_grid(black_walnut_data)
In [ ]:
 
In [ ]:
# EOF